Demo
library("tidyverse")
library("palmerpenguins")
library("janitor")
glimpse(penguins_raw)
## Rows: 344
## Columns: 17
## $ studyName <chr> "PAL0708", "PAL0708", "PAL0708", "PAL0708", "PAL…
## $ `Sample Number` <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ Species <chr> "Adelie Penguin (Pygoscelis adeliae)", "Adelie P…
## $ Region <chr> "Anvers", "Anvers", "Anvers", "Anvers", "Anvers"…
## $ Island <chr> "Torgersen", "Torgersen", "Torgersen", "Torgerse…
## $ Stage <chr> "Adult, 1 Egg Stage", "Adult, 1 Egg Stage", "Adu…
## $ `Individual ID` <chr> "N1A1", "N1A2", "N2A1", "N2A2", "N3A1", "N3A2", …
## $ `Clutch Completion` <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "No", …
## $ `Date Egg` <date> 2007-11-11, 2007-11-11, 2007-11-16, 2007-11-16,…
## $ `Culmen Length (mm)` <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34…
## $ `Culmen Depth (mm)` <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18…
## $ `Flipper Length (mm)` <dbl> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190,…
## $ `Body Mass (g)` <dbl> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 34…
## $ Sex <chr> "MALE", "FEMALE", "FEMALE", NA, "FEMALE", "MALE"…
## $ `Delta 15 N (o/oo)` <dbl> NA, 8.94956, 8.36821, NA, 8.76651, 8.66496, 9.18…
## $ `Delta 13 C (o/oo)` <dbl> NA, -24.69454, -25.33302, NA, -25.32426, -25.298…
## $ Comments <chr> "Not enough blood for isotopes.", NA, NA, "Adult…
old_names = colnames(penguins_raw)
penguins = penguins_raw %>%
janitor::clean_names()
new_names = colnames(penguins)
new_names
## [1] "study_name" "sample_number" "species"
## [4] "region" "island" "stage"
## [7] "individual_id" "clutch_completion" "date_egg"
## [10] "culmen_length_mm" "culmen_depth_mm" "flipper_length_mm"
## [13] "body_mass_g" "sex" "delta_15_n_o_oo"
## [16] "delta_13_c_o_oo" "comments"
library("ggplot2")
penguins = penguins |>
mutate(species = stringr::word(species, start = 1, end = 1))
penguins %>%
ggplot() +
# add the aesthetics
aes(x = body_mass_g,
y = flipper_length_mm,
colour = species) +
# add a geometry
geom_point() +
# tidy up the labels
labs(x = "Body mass (g)",
y = "Flipper length (mm)",
colour = "Species")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

#ggsave(filename = "myfirstggplot.png") to save the plot
Exercises
- Generate a scatter plot for another pair of (numeric) variables
plt = penguins %>%
ggplot() +
aes(x = culmen_length_mm,
y = culmen_depth_mm,
colour = species) +
geom_point() +
labs(x = "Culmen Length (mm)",
y = "Culment Depth (mm)",
colour = "Species")
plt
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

- Colour by sex and use facet_wrap() to generate a plot for each
species and island combination.
plt2 = penguins %>%
ggplot() +
aes(x = culmen_length_mm,
y = culmen_depth_mm,
colour = sex) +
geom_point() +
labs(x = "Culmen Length (mm)",
y = "Culment Depth (mm)",
colour = "sex")
plt2 + facet_wrap(vars(species, island))
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

- Try including a line of best fit by adding another geometry layer
geom_smooth(method = “lm”).
plt + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

- Use a different geometry, geom_histogram() to create a histogram for
flipper length, coloured by species.
plt3 = penguins %>%
ggplot() +
aes(x = flipper_length_mm, fill = species) +
geom_histogram() +
labs(x = "Flipper Length (mm)", fill = "Species")
plt3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_bin()`).
